/******************************************************************************
* build medical/hospital related covariates
* medicare provider of service (POS) from CMS -> location and type of hospital
* CDC mortality due to prescription opioid poisoning
*****************************************************************************/

clear all
set more off
global proj "/Users/hanyang/tu2"
global dataRoot "${proj}/Data"



*********************************************
** 2014 POS
*********************************************

import delimited "$dataRoot/Raw/POS_OTHER_DEC14.csv", clear

*just keep hospitals, (not SNF, HHA, etc...)
keep if prvdr_ctgry_cd==1 & inlist(prvdr_ctgry_sbtyp_cd, 1, 11)


keep zip_cd fips_state_cd fips_cnty_cd gnrl_cntl_type_cd elgblty_sw fac_name st_adr ///
	crtfd_bed_cnt physn_cnt rn_cnt ocptnl_thrpst_cnt 


gen hospital_fp  = (inlist(gnrl_cntl_type_cd, "04", "09"))
gen hospital_np  = (inlist(gnrl_cntl_type_cd, "01", "02"))
gen hospital_gov = (inlist(gnrl_cntl_type_cd, "05", "06", "07", "08"))

gen bed_fp  = crtfd_bed_cnt if (inlist(gnrl_cntl_type_cd, "04", "09"))
gen bed_np  = crtfd_bed_cnt if (inlist(gnrl_cntl_type_cd, "01", "02"))
gen bed_gov = crtfd_bed_cnt if (inlist(gnrl_cntl_type_cd, "05", "06", "07", "08"))


*** crosswalk to cz
gen county_id = fips_state_cd*1000 + fips_cnty_cd
merge m:1 county_id using "$dataRoot/Crosswalk/cw_cty00_cz.dta"
keep if _merge==3
drop _merge


*** collapse at cz
collapse (count) n_hospital=hospital_fp (sum) crtfd_bed_cnt hospital_* bed_fp bed_np bed_gov, by(cz czname state_id stateabbrv) fast

*fp vs no fraction
replace hospital_fp  = 100*hospital_fp/n_hospital
replace hospital_np  = 100*hospital_np/n_hospital
replace hospital_gov = 100*hospital_gov/n_hospital

*fp vs no fraction of bed
replace bed_fp  = 100*bed_fp/crtfd_bed_cnt
replace bed_np  = 100*bed_np/crtfd_bed_cnt
replace bed_gov = 100*bed_gov/crtfd_bed_cnt


foreach var of varlist n_hospital crtfd_bed_cnt{
	replace `var'=log(`var'+1)
}
*label vars
label var hospital_fp  "share of for-profit hospitals"
label var hospital_np  "share of non-profit private hospitals"
label var hospital_gov "share of government hospitals"

label var bed_fp  "share of for-profit beds"
label var bed_np  "share of non-profit private beds"
label var bed_gov "share of government beds"


label var crtfd_bed_cnt 	 "ln bed count (certified)"
label var n_hospital    	 "ln number of hospitals"

***  Save
compress
save "$dataRoot/Clean/TU_Geo_Debt_Hospital_2014", replace




*********************************************
** 2014 CDC mortality due to prescription opioid poisoning
*********************************************


**** load state level
import delimited "$dataRoot/Raw/CDC_prescription_opioid_poison_01_15_state.txt", clear
drop if death==.
drop notes state cruderate
rename statecode state
rename deaths deaths_state
compress
save "$dataRoot/Raw/CDC_prescription_opioid_poison_01_15_state", replace



**** load county level
import delimited "$dataRoot/Raw/CDC_prescription_opioid_poison_01_15.txt", clear

drop if deaths==.
sum deaths if countycode!=.
local sum1 = `r(sum)'
sum deaths
local sum2 = `r(max)'

global make_up = `sum2' - `sum1'

drop if countycode==.
drop notes cruderate county


**** merge with county population and square mile
rename countycode county
merge 1:1 county using $dataRoot/Crosswalk/county_population_area.dta
rename _merge _mergeCDCcounty


**** merge with CDC state level data
merge m:1 state using "$dataRoot/Raw/CDC_prescription_opioid_poison_01_15_state"
keep if _merge ==3
drop _merge


*fill county with missing data due to top coding rule (3 of dealth <10) 
gen pop_miss = pop10 if deaths==.
bysort state: egen deaths_county_sum = sum(deaths)
bysort state: egen pop_state = sum(pop10)
bysort state: egen pop_miss_state = sum(pop_miss)

*missing number of death at state level, and the population of counties with missing death
gen miss_death = deaths_state - deaths_county_sum 
gen pop_miss_shr = pop10/pop_miss_state if deaths==.

*assign missing death pro rata, at state level (note that some county will get a number > 10)
replace deaths = miss_death*pop_miss_shr if deaths==.



*** merge with county to cz
rename county county_id
merge m:1 county_id using "$dataRoot/Crosswalk/cw_cty00_cz.dta"
keep if _merge==3
drop _merge


*** collapse at czalt level
collapse (sum) pop10 deaths, by(cz czname state_id stateabbrv) fast
		
gen overdose = 1000000*deaths/pop10
label var overdose "Prescription Opioid Poison"


*merge with previous hospital data
merge 1:1 cz using "$dataRoot/Clean/TU_Geo_Debt_Hospital_2014"

drop _merge

*overwrite missing hospital related variables
foreach var of varlist n_hospital *_cnt hospital_* bed_*{
	replace `var'=0 if `var'==.
}

***  Save
compress
save "$dataRoot/Clean/TU_Geo_Debt_Hospital_2014", replace






